package com.lean.reptile.spider.sp;

import com.lean.reptile.spider.entity.Novel;
import org.apache.commons.lang3.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectable;

import java.util.ArrayList;
import java.util.List;

public class ZonghengProcessor implements PageProcessor {

    private Site site = Site.me().setRetryTimes(0).setSleepTime(2000).setTimeOut(60000);

    public static final String TYPE = "type";
    public static final String LIST_TYPE = "list";
    public static final String DETL_TYPE = "detl";

    @Override
    public void process(Page page) {
        // 从Request额外信息中取出页面类型，然后分别处理
        String type = page.getRequest().getExtra(TYPE).toString();

        switch (type) {
            case LIST_TYPE:
                processList(page);
                break;
            case DETL_TYPE:
                processDetl(page);
                break;
            default:
                break;
        }
    }

    /**
     * 处理列表页
     * @param page
     */
    private void processList(Page page) {
        Html html = page.getHtml();
        List<Selectable> bookInfoNodes = html.xpath("//div[@class='store_collist']/div").nodes();
        List<Novel> novelList = new ArrayList<>();
        for(Selectable node : bookInfoNodes) {
            String novelName = node.xpath("/div/div[@class='bookinfo']/div[@class='bookname']/a/text()").toString() ;
            String novelUrl = node.xpath("/div/div[@class='bookimg']/a/@href").toString();
            if(StringUtils.isNotEmpty(novelUrl)){
                String id = novelUrl.substring(novelUrl.lastIndexOf("/") + 1);
                id=id.substring(0,id.lastIndexOf("."));
                // 子节点下标值从1开始
                //作者
                String author = node.xpath("/div/div[@class='bookinfo']/div[@class='bookilnk']/a/text()").all().get(0);
                //标签
                String tag = node.xpath("/div/div[@class='bookinfo']/div[@class='bookilnk']/a/text()").all().get(1);
                //更新时间
                String lastUpdateTime = node.xpath("/div/div[@class='bookinfo']/div[@class='bookilnk']/span/text()").all().get(1).split("更新时间")[1];
                //状态
                String status = node.xpath("/div/div[@class='bookinfo']/div[@class='bookilnk']/span/text()").all().get(0);
                //描述
                String desc = node.xpath("/div/div[@class='bookinfo']/div[@class='bookintro']/text()").toString();
                //最新章节
                String newCapture = node.xpath("/div/div[@class='bookinfo']/div[@class='bookupdate']/a/text()").toString();
                String imgUrl = node.xpath("/div/div[@class='bookimg']/a/img/@src").toString();
                Novel novel = new Novel();
                novel.setId(Long.valueOf(id));
                novel.setName(novelName);
                novel.setUrl(novelUrl);
                novel.setAuthor(author);
                novel.setTags(tag);
                novel.setLastUpdateTime(lastUpdateTime);
                novel.setStatus(status);
                novel.setDesc(desc);
                novel.setChapter(newCapture);
                novel.setImgUrl(imgUrl);
                novelList.add(novel);
                // 将详情页url添加到调度器
                Request detlRequest = new Request("http://book.zongheng.com/book/" + id+".html");
                detlRequest.putExtra(TYPE, DETL_TYPE);
                page.addTargetRequest(detlRequest);
            }
        }
        page.putField("novelList", novelList);
    }


    /**
     * 处理详情页
     * @param page
     */
    private void processDetl(Page page) {
        Html html = page.getHtml();
        List<Selectable> list = html.xpath("//div[@class='book-detail clearfix']/div[@class='book-info']/div[@class='nums']/span").nodes();
        String wordNumber=list.get(0).xpath("/span/i/text()").toString();
        String toatlRecommendNumber=list.get(1).xpath("/span/i/text()").toString();
        String totalClickCount=list.get(2).xpath("/span/i/text()").toString();
        String weekRecommendNumber=list.get(3).xpath("/span/i/text()").toString();
        String intro=html.xpath("//div[@class='book-detail clearfix']/div[@class='book-info']/div[@class='book-dec Jbook-dec hide']/p/text()").toString();
        String url = page.getRequest().getUrl();
        String id = url.substring(url.lastIndexOf("/") + 1);
        id=id.substring(0,id.lastIndexOf("."));
        Novel novel=new Novel();
        novel.setId(Long.valueOf(id));
        novel.setDesc(intro);
        novel.setWordNumber(wordNumber);
        novel.setToatlRecommendNumber(toatlRecommendNumber);
        novel.setTotalClickCount(totalClickCount);
        novel.setWeekRecommendNumber(weekRecommendNumber);
        page.putField("novelDTO", novel);
    }


    @Override
    public Site getSite() {
        site.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
        site.addHeader("Accept-Encoding", "gzip, deflate, br");
        site.addHeader("Accept-Language", "zh-CN,zh;q=0.9");
        site.addHeader("Cache-Control", "max-age=0");
        site.addHeader("Connection", "keep-alive");
        site.addHeader("Cookie", "ZHID=2AAC2A85EB1D1351C2F61EB41A4C53A8; ver=2018; zh_visitTime=1652247339783; zhffr=www.baidu.com; sajssdk_2015_cross_new_user=1; Hm_lvt_c202865d524849216eea846069349eb9=1652247340; v_user=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D0i75y3UHv-Ol5beS7oXwoCzE_I-VTaOycWhefCEyW_6x1-V53-MF9GRAzd3apXVB%26wd%3D%26eqid%3De48d3dcf00034fcf00000006627b4ae8%7Chttp%3A%2F%2Fwww.zongheng.com%2F%7C55526177; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22180b19da3235ee-079940e37d5c3b-1f343371-2073600-180b19da324192%22%2C%22%24device_id%22%3A%22180b19da3235ee-079940e37d5c3b-1f343371-2073600-180b19da324192%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; Hm_lpvt_c202865d524849216eea846069349eb9=1652248703");
        site.addHeader("Host", "book.zongheng.com");
        site.addHeader("Referer", "http://book.zongheng.com/store/c0/c0/b0/u0/p1/v9/s9/t0/u0/i1/ALL.html");
        site.addHeader("Upgrade-Insecure-Requests", "1");
        site.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36");
        return site;
    }
}
